In [1]:
import warnings
warnings.filterwarnings('ignore')

import math

import glob
from tkinter import *
from tkinter import filedialog
from PIL import ImageTk, Image
import os
import imghdr
import cv2 as cv

import pandas as pd
import numpy as np
from numpy import sqrt

from skimage.transform import pyramid_reduce, resize

import skimage.filters as filters

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.utils import shuffle
                        
#models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

import tensorflow as tf
from tensorflow.keras import datasets, layers, models

from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, Activation

#preprocessing part
from skimage.segmentation import clear_border
from skimage.measure import label,regionprops, perimeter
from skimage.morphology import ball, disk, binary_erosion, remove_small_objects, reconstruction, binary_closing, binary_opening
from skimage.filters import roberts, sobel
from scipy import ndimage as ndi

In [2]:
#get all classes names
classes_old = ["Bengin","Malignant","Normal"]
classes = ["Benign","Malignant","Normal"]

#get all images location(total 1097 img)
img_list = sorted(glob.glob('dataset/after_preprocessing/train_test/*/*/*.*'))
print('Total: ', len(img_list))

#Define variable to hold X & y
#create numpy array placeholder for pixels with 1 channel(grayscale)
IMG_SIZE = 528
CHANNEL = 1
#arg: (length of numpy set, height, width, color channel)
X_segmented = np.empty((len(img_list), IMG_SIZE, IMG_SIZE), dtype=np.uint8)

y = []

# convert images to numpy arrays
for i, img_path in enumerate(img_list):
    # load image
    img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
    img = cv.resize(img, (IMG_SIZE, IMG_SIZE))
    X_segmented[i] = img
    y.append(classes[0]) if img_path.find(classes_old[0]) != -1 else (y.append(classes[1]) if img_path.find(classes_old[1]) != -1 else y.append(classes_old[2]))
    
y = pd.Series(y)

Total:  1385


In [3]:
IMG_SIZE = 128
CHANNEL = 1

X_dl_segmented_resize = np.empty((len(img_list), IMG_SIZE, IMG_SIZE), dtype=np.uint8)
# X_dl_nodules_resize = np.empty((len(img_list), IMG_SIZE, IMG_SIZE), dtype=np.uint8)
# X_dl_nodules_further_resize = np.empty((len(img_list), IMG_SIZE, IMG_SIZE), dtype=np.uint8)

for i, img in enumerate(X_segmented):
    X_dl_segmented_resize[i] = cv.resize(img, (IMG_SIZE, IMG_SIZE))
    
# for i, img in enumerate(X_nodules):
#     X_nodules_resize[i] = cv.resize(img, (IMG_SIZE, IMG_SIZE))
    
# for i, img in enumerate(X_nodules_further):
#     X_nodules_further_resize[i] = cv.resize(img, (IMG_SIZE, IMG_SIZE))

# convert to 3d array
X_dl_segmented_resize = X_dl_segmented_resize.reshape(-1, IMG_SIZE, IMG_SIZE, CHANNEL)
# X_dl_nodules_resize = X_nodules_resize.reshape(-1, IMG_SIZE, IMG_SIZE, CHANNEL)
# X_dl_nodules_further_resize = X_nodules_further_resize.reshape(-1, IMG_SIZE, IMG_SIZE, CHANNEL)

# convert to 2d array
X_ml_segmented_resize = X_dl_segmented_resize.reshape(X_dl_segmented_resize.shape[0], -1)

# #randomizedSearchCV Dataset
# X_rscv_segmented = X_segmented.reshape(X_segmented.shape[0], -1)

In [4]:
#split into train and test and factorize the label
X_dl_train_segmented = X_dl_segmented_resize[300:1385]
X_dl_test_segmented = X_dl_segmented_resize[0:300]

X_ml_train_segmented = X_ml_segmented_resize[300:1385]
X_ml_test_segmented = X_ml_segmented_resize[0:300]

# X_train_nodules = X_nodules_resize[300:1385]
# X_test_nodules = X_nodules_resize[0:300]
# X_train_nodules_further = X_nodules_further_resize[300:1385]
# X_test_nodules_further = X_nodules_further_resize[0:300]

y_train = y[300:1385]
y_test = y[0:300]
r_rscv = y.copy()

y_train = pd.Series(y_train)
y_test = pd.Series(y_test)
y_rscv = pd.Series(r_rscv)

y_train = y_train.factorize()
y_test = y_test.factorize()
y_rscv = r_rscv.factorize()

xtr_dl, xts_dl, ytr_dl, yts_dl = train_test_split(X_dl_segmented_resize, y_rscv[0], test_size=math.floor(len(X_dl_segmented_resize)*0.2167), 
                                               random_state = np.random.randint(1,1000, 1)[0])
xtr_ml, xts_ml, ytr_ml, yts_ml = train_test_split(X_ml_segmented_resize, y_rscv[0], test_size=math.floor(len(X_ml_segmented_resize)*0.2167), 
                                               random_state = np.random.randint(1,1000, 1)[0])

In [5]:
cnn_svm = models.Sequential([
        layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, CHANNEL)),
        layers.MaxPooling2D((2, 2)),
    
        layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
    
        layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
    
        layers.Conv2D(filters=256, kernel_size=(3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
    
        #dense network
        #CNN middle layer no need to specify the shape because the network can figure it out automatically
        layers.Flatten(),
    ])

X_full_cnn_svm = cnn_svm.predict(X_dl_segmented_resize)

In [6]:
X_full_cnn_svm.shape

(1385, 9216)

In [7]:
xtr_hybird, xts_hybird, ytr_hybird, yts_hybird = train_test_split(X_full_cnn_svm, y_rscv[0], test_size=math.floor(len(X_dl_segmented_resize)*0.2167), 
                                               random_state = np.random.randint(1,1000, 1)[0])

In [8]:
loaded_model = RandomForestClassifier(n_estimators = 200, min_samples_split = 2, min_samples_leaf = 2,
                            max_features = 'sqrt', max_depth = 80, bootstrap = False)

loaded_model.fit(xtr_hybird, ytr_hybird)

y_pred_cnn_rf_bestParam = loaded_model.predict(xts_hybird)
cnn_rf_bestParam_segmented_score = accuracy_score(yts_hybird, y_pred_cnn_rf_bestParam)
print('Random Forest Accuracy: ', cnn_rf_bestParam_segmented_score)

Random Forest Accuracy:  0.9233333333333333


In [9]:
def get_segmented_lungs(im, num, save=False, plot=False, show_on_window=False, crop_percentage=0.05):
    #This funtion segments the lungs from the given 2D slice.
    
    crop = im.copy()
    if show_on_window:
        height,width=im.shape[:2]
        start_row,start_col=int(height*crop_percentage),int(width*crop_percentage)
        end_row,end_col=int(height*(1-crop_percentage)),int(width*(1-crop_percentage))
        crop=crop[start_row:end_row,start_col:end_col]
    else:
        if num == 161 or (num >= 173 and num <= 174) or (num == 758):
            height,width=im.shape[:2]
            start_row,start_col=int(height*0.20),int(width*0.20)
            end_row,end_col=int(height*0.80),int(width*0.80)
            crop=crop[start_row:end_row,start_col:end_col]
        elif num >= 756 and num <= 767:
            #Step 1: Crop the image 
            height,width=im.shape[:2]
            start_row,start_col=int(height*0),int(width*0)
            end_row,end_col=int(height*1),int(width*1)
            crop=crop[start_row:end_row,start_col:end_col]
        elif num == 1320 or num == 1219 or (num >= 712 and num <= 767) or (num >= 779 and num <= 799) or (num >= 688 and num <= 699) or (num >= 648 and num <= 664) or (num >= 225 and num <= 234):
            #Step 1: Crop the image 
            height,width=im.shape[:2]
            start_row,start_col=int(height*0.03),int(width*0.03)
            end_row,end_col=int(height*0.97),int(width*0.97)
            crop=crop[start_row:end_row,start_col:end_col]
        else:
            #Step 1: Crop the image 
            height,width=im.shape[:2]
            start_row,start_col=int(height*0.12),int(width*0.12)
            end_row,end_col=int(height*0.88),int(width*0.88)
            crop=crop[start_row:end_row,start_col:end_col]
        
    #Step 2: Convert into a binary image. 
    ret,binary = cv.threshold(crop,140,255,cv.THRESH_BINARY_INV)
    
    #Step 3: Remove the blobs connected to the border of the image.
    cleared = clear_border(binary) 
    
    #Step 4: Closure operation with a disk of radius 10. This operation is 
    #to keep nodules attached to the lung wall.
    selem = disk(2)
    closing = binary_closing(cleared, selem)
        
    #Step 5: Label the image.
    label_image = label(closing)
    
    #Step 6: Keep the labels with 2 largest areas.
    areas = [r.area for r in regionprops(label_image)]
    areas.sort()
    if len(areas) > 2:
        for region in regionprops(label_image):
            if region.area < areas[-2]:
                for coordinates in region.coords:                
                       label_image[coordinates[0], coordinates[1]] = 0
    segmented_area = label_image > 0
        
    #Step 7: Erosion operation with a disk of radius 2. This operation is 
    #seperate the lung nodules attached to the blood vessels.
    selem = disk(2)
    erosion = binary_erosion(segmented_area, selem) 
        
    #     Step 4: Closure operation with a disk of radius 10. This operation is 
    #     to keep nodules attached to the lung wall.
    selem = disk(10)
    closing2 = binary_closing(erosion, selem)    
    
    #Step 8: Fill in the small holes inside the binary mask of lungs.
    edges = roberts(closing2)
    fill_holes = ndi.binary_fill_holes(edges)
        
    superimpose = crop.copy()
    #Step 9: Superimpose叠加 the binary mask on the input image.
    get_high_vals = fill_holes == 0
    superimpose[get_high_vals] = 0

    superimpose = cv.resize(superimpose, (528, 528))    
    
    if show_on_window:
        directory1 = 'result/'
        directory2 = '.jpg'
        images = [im, crop, binary, cleared, closing, segmented_area, erosion, closing2, fill_holes, superimpose]
        titles = ['0_original_image', '1_cropped_image', '2_binary_image', '3_remove_blobs', '4_closure', '5_roi', '6_erosion', '7_closure', '8_fill_hole', '9_result']
        for i, title in enumerate(titles):
            filename = directory1 + title + directory2
            try:
                cv.imwrite(filename, images[i])
            except:
                indices = images[i].astype(np.uint8)  #convert to an unsigned byte
                indices*=255
                cv.imwrite(filename, indices)
    else:
        #flip vertically
        directory1 = 'preprocessing/pre1/'
        directory2 = '.jpg'
        images = [crop, binary, cleared, label_image, superimpose]
        titles = ['cropped_image', 'binary_image', 'remove_blobs', 'label', 'result']

        if save:
            for y in range(5):
                filename = directory1 + str(y+1) + titles[y] + '/' + titles[y] + str(num+1) + directory2
                cv.imwrite(filename, images[y])

    images = [im, crop, binary, cleared, closing, label_image, segmented_area, erosion, closing2, fill_holes, superimpose]
    
    if plot:
        titles = ['Original Image', 
                 'Step 1: Cropped Image', 
                 'Step 2: Binary image', 
                 'Step 3: Remove blobs', 
                 'Step 4: Closure', 
                 'Step 5: Label', 
                 'Step 6: Region On Interest',
                 'Step 7: Erosion',
                 'Step 8: Closure', 
                 'Step 9: Fill Holes',
                 'Step 10: Result']
        plot_img(images, titles, camp=plt.cm.bone, rows = 3, cols = 4, fontsize= 50)
    
#     if show_on_window:
#         directory1 = 'result/'
#         directory2 = '.jpg'
#         titles = ['0_original_image', '1_cropped_image', '2_binary_image', '3_remove_blobs', '4_closure', '5_roi', '6_erosion', '7_fill_hole', '8_result']
#         for i, title in enumerate(titles):
#             filename = directory1 + title + directory2
#             try:
#                 cv.imwrite(filename, images[i])
#             except:
#                 indices = images[i].astype(np.uint8)  #convert to an unsigned byte
#                 indices*=255
#                 cv.imwrite(filename, indices)
    
    return superimpose

In [10]:
#define a function to convert the y_pred, y_test to human readable (from 0,1,2... to inclusion, pitted....)
def convertLabels(y_test, classes):
    return classes[y_test]

In [11]:
root = Tk()
root.title('Lung Cancer Prediction System')
width= root.winfo_screenwidth()               
height= root.winfo_screenheight()               
root.geometry("%dx%d" %(width, height))

def openfn():
    global image_path
    filename = filedialog.askopenfilename(initialdir="test", title ="Select a CT-Scan Image", filetypes=(("jpg files","*.jpg*"),("png files","*.png"),("jpeg files","*.jpeg")))
    image_path = filename
#     return filename

def open_img():
#     img_path = openfn()
    global image_path
    openfn()
    if imghdr.what(image_path) == 'png' or imghdr.what(image_path) == 'jpeg' or imghdr.what(image_path) == 'jpg':
        img = cv.imread(image_path, cv.IMREAD_GRAYSCALE)
        img = cv.resize(img, (528, 528))
        
        numOfImg = 1
        test = img.copy()
        segment_result = get_segmented_lungs(test, numOfImg, show_on_window=True, crop_percentage=float(int(clicked.get())/100))
        
        plot_on_app()
        get_prediction_result()
        
def plot_on_app():
    result_list = sorted(glob.glob('result/*.*'))
    coor_x = 82
    coor_y = 62
    for i, result_path in enumerate(result_list):
        if i == 5:
            coor_y = coor_y + 320
            coor_x = 82
        img = Image.open(result_path)
        img = img.resize((242, 242))
        image = ImageTk.PhotoImage(img)

        label_image = Label(image=image)
        label_image.image = image
        label_image.place(x=coor_x,y=coor_y)

        coor_x = coor_x + 280

def get_prediction_result():
    img = cv.imread('result/9_result.jpg', cv.IMREAD_GRAYSCALE)
    img = cv.resize(img, (128, 128))
    predict_img = np.expand_dims(img, 0)
    predict_img = predict_img.reshape(-1, 128, 128, 1)
    extraction_predict_img = cnn_svm.predict(predict_img)
    result_predict_img = loaded_model.predict(extraction_predict_img)
    result_predict_img_converted = convertLabels(result_predict_img[0], classes)
    result_label.config(text='Prediction Result : ' + str(result_predict_img_converted))
    
# Change the label text
def recrop():
    global image_path
    if image_path != '':
        img = cv.imread(image_path, cv.IMREAD_GRAYSCALE)
        img = cv.resize(img, (528, 528))
        numOfImg = 1
        test = img.copy()
        get_segmented_lungs(test, numOfImg, show_on_window=True, crop_percentage=float(int(clicked.get())/100))
        plot_on_app()
        get_prediction_result()
    
coor_x = 80
coor_y = 60
for i in range(10):
    if i == 5:
        coor_y = coor_y + 320
        coor_x = 80
    Frame(root, highlightbackground="black", highlightthickness=2,width=250, height=250).place(x=coor_x, y=coor_y)
    coor_x = coor_x + 280
    
txt1 = 'Select a CT-Scan to predict ->'
Label(root, text=txt1, font=('Times', '18', 'italic')).place(x = 40, y = 10)

Button(root, text='Select a CT-Scan Image', font=('Times', '13', 'italic'), command=open_img, bg='#C7C6C1', bd=3).place(x = 350, y = 10) 
titles = ['Original Image', 'Step 1: Cropped Image', 'Step 2: Binary image', 'Step 3: Remove blobs', 'Step 4: Closure'
          , 'Step 5: Region On Interest', 'Step 6: Erosion', 'Step 7: Closure', 'Step 8: Fill Holes', 'Step 9: Segmented Result']

txt1 = 'Prediction Result : ' 
result_label = Label(root, text=txt1, font=('Times', '18', 'italic'))
result_label.place(x = 40, y = 710)

txt1 = 'Average Test Accuracy : 0.9386, Standard Deviation : 0.0246' 
Label(root, text=txt1, font=('Times', '18', 'italic')).place(x = 770, y = 690)

txt1 = 'Average Precision Score : 0.95, Average Recall Score : 0.94, Average F1 Score : 0.94'
Label(root, text=txt1, font=('Times', '18', 'italic')).place(x = 650, y = 730)

txt1 =  'Crop :            %'
Label(root, text=txt1, font=('Times', '18', 'italic')).place(x = 700, y = 10)

#txt1 = ''
#Label(root, text=text1, font=('Times', '18', 'italic')).place(x = 200, y = 700)

# Dropdown menu options
options = [
    "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
    "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
    "21", "22", "23", "24", "25", "26", "27", "28", "29", "30"
]
  
# datatype of menu text
clicked = StringVar()
  
# initial menu text
clicked.set( "5" )
  
# Create Dropdown menu
drop = OptionMenu( root , clicked , *options).place(x = 772, y = 10)

Button(root, text='Reload', command=recrop, font=('Times', '13', 'italic'), bg='#C7C6C1', bd=3).place(x = 890, y = 10)

coor_x = [130, 370, 660, 940, 1250, 70, 405, 690, 960, 1200]
coor_y = 310
for i in range(10):
    if i == 5:
        coor_y = coor_y + 320
    Label(root, text=titles[i], font=('Times', '18', 'italic')).place(x = coor_x[i], y = coor_y)

#variable declare
image_path = ''
    
root.mainloop()